# -*- coding:utf-8 -*-
import numpy as np
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge
from sklearn.metrics import mean_absolute_error,\
        mean_squared_error, r2_score

# 完整的pipeline
'''preparing data'''
data = pd.read_excel('mlr_data.xls')
data1 = data.iloc[:160,1:]
data2 = data.iloc[160:,:] # 总评未知的部分
print(data1.describe().round(2))
X = data1.iloc[:,:-1]
y = data1.iloc[:,-1]
# split data
X_train, X_test, y_train, y_test =\
    train_test_split(X, y, test_size=0.20, random_state=53)

'''training the model'''
clf = Ridge()
clf.fit(X_train,y_train)
print("coefficients =",clf.coef_)
print("intercept b =",clf.intercept_)

'''making predictions'''
y_pred = clf.predict(X_test)

'''evaluating the model'''
# model evaluation
print('Mean Absolute Error =', mean_absolute_error(y_test, y_pred))
print('Mean Squared Error =', mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error =', np.sqrt(mean_squared_error(y_test, y_pred)))
print('R2 Score:', r2_score(y_test, y_pred))

# 对总评成绩未知的部分进行预测
y_161_pred = clf.predict(data2.iloc[:,1:-1])
data.iloc[160:,-1] = y_161_pred.round(2)
print(data.iloc[160:,:])
print(data[data['总评']<60])
